(in-package #:sb-simd-avx)

(define-instruction-set :avx
  (:test (avx-supported-p))
  (:include :x86-64)
  (:scalars
   (f32 32 single-float #:single-float (#:single-reg))
   (f64 64 double-float #:double-float (#:double-reg)))
  (:simd-packs
   (p128   nil 128 #:simd-pack (#:int-sse-reg #:double-sse-reg #:single-sse-reg))
   (f32.4  f32 128 #:simd-pack-single (#:single-sse-reg))
   (f64.2  f64 128 #:simd-pack-double (#:double-sse-reg))
   (u8.16  u8  128 #:simd-pack-ub8  (#:int-sse-reg))
   (u16.8  u16 128 #:simd-pack-ub16 (#:int-sse-reg))
   (u32.4  u32 128 #:simd-pack-ub32 (#:int-sse-reg))
   (u64.2  u64 128 #:simd-pack-ub64 (#:int-sse-reg))
   (s8.16  s8  128 #:simd-pack-sb8  (#:int-sse-reg))
   (s16.8  s16 128 #:simd-pack-sb16 (#:int-sse-reg))
   (s32.4  s32 128 #:simd-pack-sb32 (#:int-sse-reg))
   (s64.2  s64 128 #:simd-pack-sb64 (#:int-sse-reg))
   (p256   nil 256 #:simd-pack-256 (#:int-avx2-reg #:double-avx2-reg #:single-avx2-reg))
   (f32.8  f32 256 #:simd-pack-256-single (#:single-avx2-reg))
   (f64.4  f64 256 #:simd-pack-256-double (#:double-avx2-reg))
   (u8.32  u8  256 #:simd-pack-256-ub8  (#:int-avx2-reg))
   (u16.16 u16 256 #:simd-pack-256-ub16 (#:int-avx2-reg))
   (u32.8  u32 256 #:simd-pack-256-ub32 (#:int-avx2-reg))
   (u64.4  u64 256 #:simd-pack-256-ub64 (#:int-avx2-reg))
   (s8.32  s8  256 #:simd-pack-256-sb8  (#:int-avx2-reg))
   (s16.16 s16 256 #:simd-pack-256-sb16 (#:int-avx2-reg))
   (s32.8  s32 256 #:simd-pack-256-sb32 (#:int-avx2-reg))
   (s64.4  s64 256 #:simd-pack-256-sb64 (#:int-avx2-reg)))
  (:simd-casts
   (f32.4 f32.4-broadcast)
   (f64.2 f64.2-broadcast)
   (f32.8 f32.8-broadcast)
   (f64.4 f64.4-broadcast)
   (u8.16 u8.16-broadcast)
   (u16.8 u16.8-broadcast)
   (u32.4 u32.4-broadcast)
   (u64.2 u64.2-broadcast)
   (u8.32 u8.32-broadcast)
   (u16.16 u16.16-broadcast)
   (u32.8 u32.8-broadcast)
   (u64.4 u64.4-broadcast)
   (s8.16 s8.16-broadcast)
   (s16.8 s16.8-broadcast)
   (s32.4 s32.4-broadcast)
   (s64.2 s64.2-broadcast)
   (s8.32 s8.32-broadcast)
   (s16.16 s16.16-broadcast)
   (s32.8 s32.8-broadcast)
   (s64.4 s64.4-broadcast))
  (:reinterpret-casts
   (f32! f32!-from-p128 f32!-from-p256)
   (f64! f64!-from-p128 f64!-from-p256)
   (u8!  u8!-from-p128  u8!-from-p256)
   (u16! u16!-from-p128 u16!-from-p256)
   (u32! u32!-from-p128 u32!-from-p256)
   (u64! u64!-from-p128 u64!-from-p256)
   (f32.4! f32.4!-from-f32 f32.4!-from-p256)
   (f64.2! f64.2!-from-f64 f64.2!-from-p128)
   (f32.8! f32.8!-from-f32 f32.8!-from-p128 f32.8!-from-p256)
   (f64.4! f64.4!-from-f64 f64.4!-from-p128 f64.4!-from-p256)
   (u8.16! u8.16!-from-u8 u8.16!-from-p128 u8.16!-from-p256)
   (u16.8! u16.8!-from-u16 u16.8!-from-p128 u16.8!-from-p256)
   (u32.4! u32.4!-from-u32 u32.4!-from-p128 u32.4!-from-p256)
   (u64.2! u64.2!-from-u64 u64.2!-from-p128 u64.2!-from-p256)
   (u8.32! u8.32!-from-u8 u8.32!-from-p128 u8.32!-from-p256)
   (u16.16! u16.16!-from-u16 u16.16!-from-p128 u16.16!-from-p256)
   (u32.8! u32.8!-from-u32 u32.8!-from-p128 u32.8!-from-p256)
   (u64.4! u64.4!-from-u64 u64.4!-from-p128 u64.4!-from-p256)
   (s8.16! s8.16!-from-s8 s8.16!-from-p128 s8.16!-from-p256)
   (s16.8! s16.8!-from-s16 s16.8!-from-p128 s16.8!-from-p256)
   (s32.4! s32.4!-from-s32 s32.4!-from-p128 s32.4!-from-p256)
   (s64.2! s64.2!-from-s64 s64.2!-from-p128 s64.2!-from-p256)
   (s8.32! s8.32!-from-s8 s8.32!-from-p128 s8.32!-from-p256)
   (s16.16! s16.16!-from-s16 s16.16!-from-p128 s16.16!-from-p256)
   (s32.8! s32.8!-from-s32 s32.8!-from-p128 s32.8!-from-p256)
   (s64.4! s64.4!-from-s64 s64.4!-from-p128 s64.4!-from-p256))
  (:instructions
   (vzeroupper          #:vzeroupper   ()      ()   :cost 1 :pure nil)
   (vzeroall            #:vzeroall     ()      ()   :cost 1 :pure nil)
   ;; f32
   (f32-from-f64        #:vcvtsd2ss    (f32) (f64)     :cost 5)
   (f32-from-s64        nil            (f32) (s64)     :cost 5 :encoding :custom)
   (f32!-from-p128      nil            (f32) (p128)    :cost 1 :encoding :custom :always-translatable nil)
   (f32!-from-p256      nil            (f32) (p256)    :cost 1 :encoding :custom :always-translatable nil)
   (two-arg-f32-and     #:vandps       (f32) (f32 f32) :cost 1 :associative t)
   (two-arg-f32-or      #:vorps        (f32) (f32 f32) :cost 1 :associative t)
   (two-arg-f32-xor     #:vxorps       (f32) (f32 f32) :cost 1 :associative t)
   (two-arg-f32-max     #:vmaxss       (f32) (f32 f32) :cost 1 :associative t)
   (two-arg-f32-min     #:vminss       (f32) (f32 f32) :cost 1 :associative t)
   (two-arg-f32+        #:vaddss       (f32) (f32 f32) :cost 1 :associative t)
   (two-arg-f32-        #:vsubss       (f32) (f32 f32) :cost 2)
   (two-arg-f32*        #:vmulss       (f32) (f32 f32) :cost 2 :associative t)
   (two-arg-f32/        #:vdivss       (f32) (f32 f32) :cost 8)
   (two-arg-f32=        #:vcmpss       (u32) (f32 f32) :cost 4 :encoding :custom :prefix '(:eq) :associative t)
   (two-arg-f32/=       #:vcmpss       (u32) (f32 f32) :cost 4 :encoding :custom :prefix '(:neq) :associative t)
   (two-arg-f32<        #:vcmpss       (u32) (f32 f32) :cost 4 :encoding :custom :prefix '(:lt))
   (two-arg-f32<=       #:vcmpss       (u32) (f32 f32) :cost 4 :encoding :custom :prefix '(:le))
   (two-arg-f32>        #:vcmpss       (u32) (f32 f32) :cost 4 :encoding :custom :prefix '(:nle))
   (two-arg-f32>=       #:vcmpss       (u32) (f32 f32) :cost 4 :encoding :custom :prefix '(:nlt))
   (f32-andc1           #:vandnps      (f32) (f32 f32) :cost 1)
   (f32-not             nil            (f32) (f32)     :cost 1 :encoding :fake-vop)
   (f32-reciprocal      #:vrcpss       (f32) (f32)     :cost 5)
   (f32-rsqrt           #:vrsqrtss     (f32) (f32)     :cost 5)
   (f32-sqrt            #:vsqrtss      (f32) (f32)     :cost 15)
   ;; f64
   (f64-from-f32        #:vcvtss2sd    (f64) (f32)     :cost 5)
   (f64-from-s64        nil            (f64) (s64)     :cost 5 :encoding :custom)
   (f64!-from-p128      nil            (f64) (p128)    :cost 1 :encoding :custom :always-translatable nil)
   (f64!-from-p256      nil            (f64) (p256)    :cost 1 :encoding :custom :always-translatable nil)
   (two-arg-f64-and     #:vandpd       (f64) (f64 f64) :cost 1 :associative t)
   (two-arg-f64-or      #:vorpd        (f64) (f64 f64) :cost 1 :associative t)
   (two-arg-f64-xor     #:vxorpd       (f64) (f64 f64) :cost 1 :associative t)
   (two-arg-f64-max     #:vmaxsd       (f64) (f64 f64) :cost 1 :associative t)
   (two-arg-f64-min     #:vminsd       (f64) (f64 f64) :cost 1 :associative t)
   (two-arg-f64+        #:vaddsd       (f64) (f64 f64) :cost 1 :associative t)
   (two-arg-f64-        #:vsubsd       (f64) (f64 f64) :cost 2)
   (two-arg-f64*        #:vmulsd       (f64) (f64 f64) :cost 2 :associative t)
   (two-arg-f64/        #:vdivsd       (f64) (f64 f64) :cost 8)
   (two-arg-f64=        #:vcmpsd       (u64) (f64 f64) :cost 4 :encoding :custom :prefix '(:eq) :associative t)
   (two-arg-f64/=       #:vcmpsd       (u64) (f64 f64) :cost 4 :encoding :custom :prefix '(:neq) :associative t)
   (two-arg-f64<        #:vcmpsd       (u64) (f64 f64) :cost 4 :encoding :custom :prefix '(:lt))
   (two-arg-f64<=       #:vcmpsd       (u64) (f64 f64) :cost 4 :encoding :custom :prefix '(:le))
   (two-arg-f64>        #:vcmpsd       (u64) (f64 f64) :cost 4 :encoding :custom :prefix '(:nle))
   (two-arg-f64>=       #:vcmpsd       (u64) (f64 f64) :cost 4 :encoding :custom :prefix '(:nlt))
   (f64-andc1           #:vandnpd      (f64) (f64 f64) :cost 1)
   (f64-not             nil            (f64) (f64)     :cost 1 :encoding :fake-vop)
   (f64-sqrt            #:vsqrtsd      (f64) (f64)     :cost 15)
   ;; scalar reinterpret casts
   ( u8!-from-p128      nil            (u8)  (p128) :cost 1 :encoding :fake-vop)
   ( u8!-from-p256      nil            (u8)  (p256) :cost 1 :encoding :fake-vop)
   (u16!-from-p128      nil            (u16) (p128) :cost 1 :encoding :fake-vop)
   (u16!-from-p256      nil            (u16) (p256) :cost 1 :encoding :fake-vop)
   (u32!-from-p128      nil            (u32) (p128) :cost 1 :encoding :fake-vop)
   (u32!-from-p256      nil            (u32) (p256) :cost 1 :encoding :fake-vop)
   (u64!-from-p128      #:movq         (u64) (p128) :cost 1 :always-translatable nil)
   (u64!-from-p256      #:movq         (u64) (p256) :cost 1 :always-translatable nil)
   ;; f32.4
   (f32.4-from-s32.4    #:vcvtdq2ps    (f32.4) (s32.4)       :cost 5)
   (f32.4!-from-f32     #:vmovups      (f32.4) (f32)         :cost 1 :encoding :move)
   (f32.4!-from-p128    #:vmovups      (f32.4) (p128)        :cost 1 :encoding :move :always-translatable nil)
   (f32.4!-from-p256    #:vextractf128 (f32.4) (p256)        :cost 1 :suffix '(0) :always-translatable nil)
   (make-f32.4          nil            (f32.4) (f32 f32 f32 f32) :cost 1 :encoding :fake-vop)
   (f32.4-values        nil            (f32 f32 f32 f32) (f32.4) :cost 1 :encoding :fake-vop)
   (f32.4-broadcast     #:vbroadcastss (f32.4) (f32)         :cost 1)
   (f32.4-from-f64.4    #:vcvtpd2ps    (f32.4) (f64.4)       :cost 5)
   (f32.4-blend         #:vblendvps    (f32.4) (f32.4 f32.4 u32.4) :cost 1)
   (f32.4-blendc        #:vblendps     (f32.4) (f32.4 f32.4 imm4) :cost 1)
   (two-arg-f32.4-and   #:vandps       (f32.4) (f32.4 f32.4) :cost 1 :associative t)
   (two-arg-f32.4-or    #:vorps        (f32.4) (f32.4 f32.4) :cost 1 :associative t)
   (two-arg-f32.4-xor   #:vxorps       (f32.4) (f32.4 f32.4) :cost 1 :associative t)
   (two-arg-f32.4-max   #:vmaxps       (f32.4) (f32.4 f32.4) :cost 3 :associative t)
   (two-arg-f32.4-min   #:vminps       (f32.4) (f32.4 f32.4) :cost 3 :associative t)
   (f32.4-andc1         #:vandnps      (f32.4) (f32.4 f32.4) :cost 1)
   (f32.4-not           nil            (f32.4) (f32.4)       :cost 1 :encoding :fake-vop)
   (two-arg-f32.4+      #:vaddps       (f32.4) (f32.4 f32.4) :cost 2 :associative t)
   (two-arg-f32.4-      #:vsubps       (f32.4) (f32.4 f32.4) :cost 2)
   (two-arg-f32.4*      #:vmulps       (f32.4) (f32.4 f32.4) :cost 2 :associative t)
   (two-arg-f32.4/      #:vdivps       (f32.4) (f32.4 f32.4) :cost 8)
   (two-arg-f32.4=      #:vcmpps       (u32.4) (f32.4 f32.4) :cost 4 :prefix '(:eq) :associative t)
   (two-arg-f32.4/=     #:vcmpps       (u32.4) (f32.4 f32.4) :cost 4 :prefix '(:neq) :associative t)
   (two-arg-f32.4<      #:vcmpps       (u32.4) (f32.4 f32.4) :cost 4 :prefix '(:lt))
   (two-arg-f32.4<=     #:vcmpps       (u32.4) (f32.4 f32.4) :cost 4 :prefix '(:le))
   (two-arg-f32.4>      #:vcmpps       (u32.4) (f32.4 f32.4) :cost 4 :prefix '(:gt))
   (two-arg-f32.4>=     #:vcmpps       (u32.4) (f32.4 f32.4) :cost 4 :prefix '(:ge))
   (f32.4-horizontal-and nil           (f32)   (f32.4)       :cost 5 :encoding :fake-vop)
   (f32.4-horizontal-or  nil           (f32)   (f32.4)       :cost 5 :encoding :fake-vop)
   (f32.4-horizontal-xor nil           (f32)   (f32.4)       :cost 5 :encoding :fake-vop)
   (f32.4-horizontal-max nil           (f32)   (f32.4)       :cost 5 :encoding :fake-vop)
   (f32.4-horizontal-min nil           (f32)   (f32.4)       :cost 5 :encoding :fake-vop)
   (f32.4-horizontal+    nil           (f32)   (f32.4)       :cost 5 :encoding :fake-vop)
   (f32.4-horizontal*    nil           (f32)   (f32.4)       :cost 5 :encoding :fake-vop)
   (f32.4-dupeven       #:vmovsldup    (f32.4) (f32.4)       :cost 1)
   (f32.4-dupodd        #:vmovshdup    (f32.4) (f32.4)       :cost 1)
   (f32.4-addsub        #:vaddsubps    (f32.4) (f32.4 f32.4) :cost 3)
   (f32.4-hadd          #:vhaddps      (f32.4) (f32.4 f32.4) :cost 6)
   (f32.4-hsub          #:vhsubps      (f32.4) (f32.4 f32.4) :cost 6)
   (f32.4-reciprocal    #:vrcpps       (f32.4) (f32.4)       :cost 5)
   (f32.4-rsqrt         #:vrsqrtps     (f32.4) (f32.4)       :cost 5)
   (f32.4-sqrt          #:vsqrtps      (f32.4) (f32.4)       :cost 15)
   (f32.4-unpackhi      #:vunpckhps    (f32.4) (f32.4 f32.4) :cost 1)
   (f32.4-unpacklo      #:vunpcklps    (f32.4) (f32.4 f32.4) :cost 1)
   (f32.4-movemask      #:vmovmskps    (u4)    (f32.4)       :cost 1)
   (f32.4-%round        #:vroundps     (f32.4) (f32.4 imm3)  :cost 2)
   (f32.4-permute       #:vpermilps    (f32.4) (f32.4 imm8)  :cost 1)
   (f32.4-shuffle       #:vshufps      (f32.4) (f32.4 f32.4 imm8) :cost 1)
   (f32.4-movemask      #:vmovmskps    (u4)    (f32.4)       :cost 1)
   ;; f64.2
   (f64.2!-from-f64     #:vmovupd      (f64.2) (f64)         :cost 1 :encoding :move)
   (f64.2!-from-p128    #:vmovupd      (f64.2) (p128)        :cost 1 :encoding :move :always-translatable nil)
   (f64.2!-from-p256    #:vextractf128 (f64.2) (p256)        :cost 1 :suffix '(0) :always-translatable nil)
   (make-f64.2          nil            (f64.2) (f64 f64)     :cost 1 :encoding :fake-vop)
   (f64.2-values        nil            (f64 f64) (f64.2)     :cost 1 :encoding :fake-vop)
   (f64.2-broadcast     #:vmovddup     (f64.2) (f64)         :cost 1)
   (f64.2-blend         #:vblendvpd    (f64.2) (f64.2 f64.2 u64.2) :cost 1)
   (f64.2-blendc        #:vblendpd     (f64.2) (f64.2 f64.2 imm2) :cost 1)
   (two-arg-f64.2-and   #:vandpd       (f64.2) (f64.2 f64.2) :cost 1 :associative t)
   (two-arg-f64.2-or    #:vorpd        (f64.2) (f64.2 f64.2) :cost 1 :associative t)
   (two-arg-f64.2-xor   #:vxorpd       (f64.2) (f64.2 f64.2) :cost 1 :associative t)
   (two-arg-f64.2-max   #:vmaxpd       (f64.2) (f64.2 f64.2) :cost 3 :associative t)
   (two-arg-f64.2-min   #:vminpd       (f64.2) (f64.2 f64.2) :cost 3 :associative t)
   (two-arg-f64.2+      #:vaddpd       (f64.2) (f64.2 f64.2) :cost 2 :associative t)
   (f64.2-andc1         #:vandnpd      (f64.2) (f64.2 f64.2) :cost 1)
   (f64.2-not           nil            (f64.2) (f64.2)       :cost 1 :encoding :fake-vop)
   (two-arg-f64.2-      #:vsubpd       (f64.2) (f64.2 f64.2) :cost 2)
   (two-arg-f64.2*      #:vmulpd       (f64.2) (f64.2 f64.2) :cost 2 :associative t)
   (two-arg-f64.2/      #:vdivpd       (f64.2) (f64.2 f64.2) :cost 8)
   (two-arg-f64.2=      #:vcmppd       (u64.2) (f64.2 f64.2) :cost 4 :prefix '(:eq) :associative t)
   (two-arg-f64.2/=     #:vcmppd       (u64.2) (f64.2 f64.2) :cost 4 :prefix '(:neq) :associative t)
   (two-arg-f64.2<      #:vcmppd       (u64.2) (f64.2 f64.2) :cost 4 :prefix '(:lt))
   (two-arg-f64.2<=     #:vcmppd       (u64.2) (f64.2 f64.2) :cost 4 :prefix '(:le))
   (two-arg-f64.2>      #:vcmppd       (u64.2) (f64.2 f64.2) :cost 4 :prefix '(:gt))
   (two-arg-f64.2>=     #:vcmppd       (u64.2) (f64.2 f64.2) :cost 4 :prefix '(:ge))
   (f64.2-horizontal-and nil           (f64)   (f64.2)       :cost 3 :encoding :fake-vop)
   (f64.2-horizontal-or  nil           (f64)   (f64.2)       :cost 3 :encoding :fake-vop)
   (f64.2-horizontal-xor nil           (f64)   (f64.2)       :cost 3 :encoding :fake-vop)
   (f64.2-horizontal-max nil           (f64)   (f64.2)       :cost 3 :encoding :fake-vop)
   (f64.2-horizontal-min nil           (f64)   (f64.2)       :cost 3 :encoding :fake-vop)
   (f64.2-horizontal+    nil           (f64)   (f64.2)       :cost 3 :encoding :fake-vop)
   (f64.2-horizontal*    nil           (f64)   (f64.2)       :cost 3 :encoding :fake-vop)
   (f64.2-addsub        #:vaddsubpd    (f64.2) (f64.2 f64.2) :cost 3)
   (f64.2-hadd          #:vhaddpd      (f64.2) (f64.2 f64.2) :cost 6)
   (f64.2-hsub          #:vhsubpd      (f64.2) (f64.2 f64.2) :cost 6)
   (f64.2-sqrt          #:vsqrtpd      (f64.2) (f64.2)       :cost 20)
   (f64.2-unpackhi      #:vunpckhpd    (f64.2) (f64.2 f64.2) :cost 1)
   (f64.2-unpacklo      #:vunpcklpd    (f64.2) (f64.2 f64.2) :cost 1)
   (f64.2-movemask      #:vmovmskpd    (u2)    (f64.2)       :cost 1)
   (f64.2-%round        #:vroundpd     (f64.2) (f64.2 imm3)  :cost 2)
   (f64.2-permute       #:vpermilpd    (f64.2) (f64.2 imm2)  :cost 1)
   (f64.2-shuffle       #:vshufpd      (f64.2) (f64.2 f64.2 imm2) :cost 1)
   (f64.2-movemask      #:vmovmskpd    (u2)    (f64.2)       :cost 1)
   ;; f32.8
   (f32.8-from-s32.8    #:vcvtdq2ps    (f32.8) (s32.8)      :cost 5)
   (f32.8!-from-f32     #:vmovups      (f32.8) (f32)        :cost 1 :encoding :move)
   (f32.8!-from-p128    #:vmovups      (f32.8) (p128)       :cost 1 :encoding :move :always-translatable nil)
   (f32.8!-from-p256    #:vmovups      (f32.8) (p256)       :cost 1 :encoding :move :always-translatable nil)
   (make-f32.8          nil            (f32.8) (f32 f32 f32 f32 f32 f32 f32 f32) :cost 1 :encoding :fake-vop)
   (f32.8-values        nil            (f32 f32 f32 f32 f32 f32 f32 f32) (f32.8) :cost 1 :encoding :fake-vop)
   (f32.8-broadcast     #:vbroadcastss (f32.8) (f32)         :cost 1)
   (f32.8-from-u32.8    #:vcvtdq2ps    (f32.8) (u32.8)       :cost 5)
   (f32.8-blend         #:vblendvps    (f32.8) (f32.8 f32.8 u32.8) :cost 1)
   (f32.8-blendc        #:vblendps     (f32.8) (f32.8 f32.8 imm8) :cost 1)
   (two-arg-f32.8-and   #:vandps       (f32.8) (f32.8 f32.8) :cost 1 :associative t)
   (two-arg-f32.8-or    #:vorps        (f32.8) (f32.8 f32.8) :cost 1 :associative t)
   (two-arg-f32.8-xor   #:vxorps       (f32.8) (f32.8 f32.8) :cost 1 :associative t)
   (two-arg-f32.8-max   #:vmaxps       (f32.8) (f32.8 f32.8) :cost 3 :associative t)
   (two-arg-f32.8-min   #:vminps       (f32.8) (f32.8 f32.8) :cost 3 :associative t)
   (f32.8-andc1         #:vandnps      (f32.8) (f32.8 f32.8) :cost 1)
   (f32.8-not           nil            (f32.8) (f32.8)       :cost 1 :encoding :fake-vop)
   (two-arg-f32.8+      #:vaddps       (f32.8) (f32.8 f32.8) :cost 2 :associative t)
   (two-arg-f32.8-      #:vsubps       (f32.8) (f32.8 f32.8) :cost 2)
   (two-arg-f32.8*      #:vmulps       (f32.8) (f32.8 f32.8) :cost 2 :associative t)
   (two-arg-f32.8/      #:vdivps       (f32.8) (f32.8 f32.8) :cost 8)
   (two-arg-f32.8=      #:vcmpps       (u32.8) (f32.8 f32.8) :cost 4 :prefix '(:eq) :associative t)
   (two-arg-f32.8/=     #:vcmpps       (u32.8) (f32.8 f32.8) :cost 4 :prefix '(:neq) :associative t)
   (two-arg-f32.8<      #:vcmpps       (u32.8) (f32.8 f32.8) :cost 4 :prefix '(:lt))
   (two-arg-f32.8<=     #:vcmpps       (u32.8) (f32.8 f32.8) :cost 4 :prefix '(:le))
   (two-arg-f32.8>      #:vcmpps       (u32.8) (f32.8 f32.8) :cost 4 :prefix '(:gt))
   (two-arg-f32.8>=     #:vcmpps       (u32.8) (f32.8 f32.8) :cost 4 :prefix '(:ge))
   (f32.8-horizontal-and nil           (f32)   (f32.8)       :cost 5 :encoding :fake-vop)
   (f32.8-horizontal-or  nil           (f32)   (f32.8)       :cost 5 :encoding :fake-vop)
   (f32.8-horizontal-xor nil           (f32)   (f32.8)       :cost 5 :encoding :fake-vop)
   (f32.8-horizontal-max nil           (f32)   (f32.8)       :cost 5 :encoding :fake-vop)
   (f32.8-horizontal-min nil           (f32)   (f32.8)       :cost 5 :encoding :fake-vop)
   (f32.8-horizontal+    nil           (f32)   (f32.8)       :cost 5 :encoding :fake-vop)
   (f32.8-horizontal*    nil           (f32)   (f32.8)       :cost 5 :encoding :fake-vop)
   (f32.8-dupeven       #:vmovsldup    (f32.8) (f32.8)       :cost 1)
   (f32.8-dupodd        #:vmovshdup    (f32.8) (f32.8)       :cost 1)
   (f32.8-hadd          #:vhaddps      (f32.8) (f32.8 f32.8) :cost 6)
   (f32.8-hsub          #:vhsubps      (f32.8) (f32.8 f32.8) :cost 6)
   (f32.8-reciprocal    #:vrcpps       (f32.8) (f32.8)       :cost 5)
   (f32.8-rsqrt         #:vrsqrtps     (f32.8) (f32.8)       :cost 5)
   (f32.8-sqrt          #:vsqrtps      (f32.8) (f32.8)       :cost 15)
   (f32.8-unpackhi      #:vunpckhps    (f32.8) (f32.8 f32.8) :cost 1)
   (f32.8-unpacklo      #:vunpcklps    (f32.8) (f32.8 f32.8) :cost 1)
   (f32.8-movemask      #:vmovmskps    (u8)    (f32.8)       :cost 1)
   (f32.8-%round        #:vroundps     (f32.8) (f32.8 imm3)  :cost 2)
   (f32.8-permute       #:vpermilps    (f32.8) (f32.8 imm8)  :cost 1)
   (f32.8-permute128    #:vperm2f128   (f32.8) (f32.8 f32.8 imm8) :cost 1)
   (f32.8-shuffle       #:vshufps      (f32.8) (f32.8 f32.8 imm8) :cost 1)
   (f32.4-from-f32.8    #:vextractf128 (f32.4) (f32.8 imm1)  :cost 1)
   (f32.8-insert-f32.4  #:vinsertf128  (f32.8) (f32.8 f32.4 imm1) :cost 1)
   (f32.8-movemask      #:vmovmskps    (u8)    (f32.8)       :cost 1)
   ;; f64.4
   (f64.4!-from-f64     #:vmovupd      (f64.4) (f64)         :cost 1 :encoding :move)
   (f64.4!-from-p128    #:vmovupd      (f64.4) (p128)        :cost 1 :encoding :move :always-translatable nil)
   (f64.4!-from-p256    #:vmovupd      (f64.4) (p256)        :cost 1 :encoding :move :always-translatable nil)
   (make-f64.4          nil            (f64.4) (f64 f64 f64 f64) :cost 1 :encoding :fake-vop)
   (f64.4-values        nil            (f64 f64 f64 f64) (f64.4) :cost 1 :encoding :fake-vop)
   (f64.4-broadcast     #:vbroadcastsd (f64.4) (f64)         :cost 1)
   (f64.4-from-f32.4    #:vcvtps2pd    (f64.4) (f32.4)       :cost 5)
   (f64.4-from-u32.4    #:vcvtdq2pd    (f64.4) (u32.4)       :cost 5)
   (f64.4-from-s32.4    #:vcvtdq2pd    (f64.4) (s32.4)       :cost 5)
   (f64.4-blend         #:vblendvpd    (f64.4) (f64.4 f64.4 u64.4) :cost 1)
   (f64.4-blendc        #:vblendpd     (f64.4) (f64.4 f64.4 imm4) :cost 1)
   (two-arg-f64.4-and   #:vandpd       (f64.4) (f64.4 f64.4) :cost 1 :associative t)
   (two-arg-f64.4-or    #:vorpd        (f64.4) (f64.4 f64.4) :cost 1 :associative t)
   (two-arg-f64.4-xor   #:vxorpd       (f64.4) (f64.4 f64.4) :cost 1 :associative t)
   (two-arg-f64.4-max   #:vmaxpd       (f64.4) (f64.4 f64.4) :cost 3 :associative t)
   (two-arg-f64.4-min   #:vminpd       (f64.4) (f64.4 f64.4) :cost 3 :associative t)
   (two-arg-f64.4+      #:vaddpd       (f64.4) (f64.4 f64.4) :cost 2 :associative t)
   (two-arg-f64.4-      #:vsubpd       (f64.4) (f64.4 f64.4) :cost 2)
   (two-arg-f64.4*      #:vmulpd       (f64.4) (f64.4 f64.4) :cost 2 :associative t)
   (two-arg-f64.4/      #:vdivpd       (f64.4) (f64.4 f64.4) :cost 8)
   (two-arg-f64.4=      #:vcmppd       (u64.4) (f64.4 f64.4) :cost 4 :prefix '(:eq) :associative t)
   (two-arg-f64.4/=     #:vcmppd       (u64.4) (f64.4 f64.4) :cost 4 :prefix '(:neq) :associative t)
   (two-arg-f64.4<      #:vcmppd       (u64.4) (f64.4 f64.4) :cost 4 :prefix '(:lt))
   (two-arg-f64.4<=     #:vcmppd       (u64.4) (f64.4 f64.4) :cost 4 :prefix '(:le))
   (two-arg-f64.4>      #:vcmppd       (u64.4) (f64.4 f64.4) :cost 4 :prefix '(:gt))
   (two-arg-f64.4>=     #:vcmppd       (u64.4) (f64.4 f64.4) :cost 4 :prefix '(:ge))
   (f64.4-horizontal-and nil           (f64)   (f64.4)       :cost 3 :encoding :fake-vop)
   (f64.4-horizontal-or  nil           (f64)   (f64.4)       :cost 3 :encoding :fake-vop)
   (f64.4-horizontal-xor nil           (f64)   (f64.4)       :cost 3 :encoding :fake-vop)
   (f64.4-horizontal-max nil           (f64)   (f64.4)       :cost 3 :encoding :fake-vop)
   (f64.4-horizontal-min nil           (f64)   (f64.4)       :cost 3 :encoding :fake-vop)
   (f64.4-horizontal+    nil           (f64)   (f64.4)       :cost 3 :encoding :fake-vop)
   (f64.4-horizontal*    nil           (f64)   (f64.4)       :cost 3 :encoding :fake-vop)
   (f64.4-andc1         #:vandnpd      (f64.4) (f64.4 f64.4) :cost 1)
   (f64.4-not           nil            (f64.4) (f64.4)       :cost 1 :encoding :fake-vop)
   (f64.4-dupeven       #:vmovddup     (f64.4) (f64.4)       :cost 1)
   (f64.4-hadd          #:vhaddpd      (f64.4) (f64.4 f64.4) :cost 6)
   (f64.4-hsub          #:vhsubpd      (f64.4) (f64.4 f64.4) :cost 6)
   (f64.4-sqrt          #:vsqrtpd      (f64.4) (f64.4)       :cost 20)
   (f64.4-unpackhi      #:vunpckhpd    (f64.4) (f64.4 f64.4) :cost 1)
   (f64.4-unpacklo      #:vunpcklpd    (f64.4) (f64.4 f64.4) :cost 1)
   (f64.4-movemask      #:vmovmskpd    (u4)    (f64.4)       :cost 1)
   (f64.4-%round        #:vroundpd     (f64.4) (f64.4 imm3)  :cost 2)
   (f64.4-permute       #:vpermilpd    (f64.4) (f64.4 imm4)  :cost 1)
   (f64.4-permute128    #:vperm2f128   (f64.4) (f64.4 f64.4 imm8) :cost 1)
   (f64.4-shuffle       #:vshufpd      (f64.4) (f64.4 f64.4 imm4) :cost 1)
   (f64.4-reverse       nil            (f64.4) (f64.4)       :cost 2 :encoding :fake-vop)
   (f64.2-from-f64.4    #:vextractf128 (f64.2) (f64.4 imm1)  :cost 1)
   (f64.4-insert-f64.2  #:vinsertf128  (f64.4) (f64.4 f64.2 imm1) :cost 1)
   (f64.4-set128        #:vinsertf128  (f64.4) (f64.2 f64.2 imm1) :cost 1)
   (f64.4-movemask      #:vmovmskpd    (u4)    (f64.4)       :cost 1)
   ;; u8.16
   (u8.16!-from-u8      #:movq         (u8.16) (u8)          :cost 1)
   (u8.16!-from-p128    #:vmovdqu      (u8.16) (p128)        :cost 1 :encoding :move :always-translatable nil)
   (u8.16!-from-p256    #:vextractf128 (u8.16) (p256)        :cost 1 :suffix '(0) :always-translatable nil)
   (make-u8.16          nil            (u8.16) (u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8) :cost 1 :encoding :fake-vop)
   (u8.16-values        nil            (u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8) (u8.16) :cost 1 :encoding :fake-vop)
   (u8.16-broadcast     nil            (u8.16) (u8)          :cost 1 :encoding :fake-vop)
   (u8.16-blend         #:vpblendvb    (u8.16) (u8.16 u8.16 u8.16) :cost 1)
   (two-arg-u8.16-and   #:vpand        (u8.16) (u8.16 u8.16) :cost 1 :associative t)
   (two-arg-u8.16-or    #:vpor         (u8.16) (u8.16 u8.16) :cost 1 :associative t)
   (two-arg-u8.16-xor   #:vpxor        (u8.16) (u8.16 u8.16) :cost 1 :associative t)
   (u8.16-andc1         #:vpandn       (u8.16) (u8.16 u8.16) :cost 1)
   (u8.16-not           nil            (u8.16) (u8.16)       :cost 1 :encoding :fake-vop)
   (two-arg-u8.16+      #:vpaddb       (u8.16) (u8.16 u8.16) :cost 2 :associative t)
   (two-arg-u8.16-      #:vpsubb       (u8.16) (u8.16 u8.16) :cost 2)
   (two-arg-u8.16=      #:vpcmpeqb     (u8.16) (u8.16 u8.16) :cost 1 :associative t)
   (two-arg-u8.16/=     nil            (u8.16) (u8.16 u8.16) :cost 2 :associative t :encoding :fake-vop)
   (two-arg-u8.16>~     #:vpcmpgtb     (u8.16) (u8.16 u8.16) :cost 1)
   (two-arg-u8.16>      nil            (u8.16) (u8.16 u8.16) :cost 1 :encoding :fake-vop)
   (two-arg-u8.16<      nil            (u8.16) (u8.16 u8.16) :cost 1 :encoding :fake-vop)
   (two-arg-u8.16>=     nil            (u8.16) (u8.16 u8.16) :cost 2 :encoding :fake-vop)
   (two-arg-u8.16<=     nil            (u8.16) (u8.16 u8.16) :cost 2 :encoding :fake-vop)
   (u8.16-unpackhi      #:vpunpckhbw   (u8.16) (u8.16 u8.16) :cost 1)
   (u8.16-unpacklo      #:vpunpcklbw   (u8.16) (u8.16 u8.16) :cost 1)
   (u8.16-movemask      #:vpmovmskb    (u16)   (u8.16)       :cost 1)
   (u8.16-shuffle       #:vpshufb      (u8.16) (u8.16 u8.16) :cost 1) ;; TODO missing imm
   ;; u16.8
   (u16.8!-from-u16     #:movq         (u16.8) (u16)         :cost 1)
   (u16.8!-from-p128    #:vmovdqu      (u16.8) (p128)        :cost 1 :encoding :move :always-translatable nil)
   (u16.8!-from-p256    #:vextractf128 (u16.8) (p256)        :cost 1 :suffix '(0) :always-translatable nil)
   (make-u16.8          nil            (u16.8) (u16 u16 u16 u16 u16 u16 u16 u16) :cost 1 :encoding :fake-vop)
   (u16.8-values        nil            (u16 u16 u16 u16 u16 u16 u16 u16) (u16.8) :cost 1 :encoding :fake-vop)
   (u16.8-broadcast     nil            (u16.8) (u16)         :cost 1 :encoding :fake-vop)
   (u16.8-blend         #:vpblendvb    (u16.8) (u16.8 u16.8 u16.8) :cost 1)
   (two-arg-u16.8-and   #:vpand        (u16.8) (u16.8 u16.8) :cost 1 :associative t)
   (two-arg-u16.8-or    #:vpor         (u16.8) (u16.8 u16.8) :cost 1 :associative t)
   (two-arg-u16.8-xor   #:vpxor        (u16.8) (u16.8 u16.8) :cost 1 :associative t)
   (u16.8-andc1         #:vpandn       (u16.8) (u16.8 u16.8) :cost 1)
   (u16.8-not           nil            (u16.8) (u16.8)       :cost 1 :encoding :fake-vop)
   (two-arg-u16.8+      #:vpaddw       (u16.8) (u16.8 u16.8) :cost 2 :associative t)
   (two-arg-u16.8-      #:vpsubw       (u16.8) (u16.8 u16.8) :cost 2)
   (two-arg-u16.8=      #:vpcmpeqw     (u16.8) (u16.8 u16.8) :cost 1 :associative t)
   (two-arg-u16.8/=     nil            (u16.8) (u16.8 u16.8) :cost 2 :associative t :encoding :fake-vop)
   (two-arg-u16.8>~     #:vpcmpgtw     (u16.8) (u16.8 u16.8) :cost 1)
   (two-arg-u16.8>      nil            (u16.8) (u16.8 u16.8) :cost 1 :encoding :fake-vop)
   (two-arg-u16.8<      nil            (u16.8) (u16.8 u16.8) :cost 1 :encoding :fake-vop)
   (two-arg-u16.8>=     nil            (u16.8) (u16.8 u16.8) :cost 2 :encoding :fake-vop)
   (two-arg-u16.8<=     nil            (u16.8) (u16.8 u16.8) :cost 2 :encoding :fake-vop)
   (u16.8-shiftl        #:vpsllw       (u16.8) (u16.8 u16.8) :cost 1)
   (u16.8-shiftr        #:vpsrlw       (u16.8) (u16.8 u16.8) :cost 1)
   (u16.8-unpackhi      #:vpunpckhwd   (u16.8) (u16.8 u16.8) :cost 1)
   (u16.8-unpacklo      #:vpunpcklwd   (u16.8) (u16.8 u16.8) :cost 1)
   (u16.8-movemask      nil            (u8)    (u16.8)       :cost 1 :encoding :fake-vop)
   (u16.8-shufflehi     #:vpshufhw     (u16.8) (u16.8 imm8)  :cost 1)
   (u16.8-shufflelo     #:vpshuflw     (u16.8) (u16.8 imm8)  :cost 1)
   ;; u32.4
   (u32.4!-from-u32     #:movq         (u32.4) (u32)         :cost 1)
   (u32.4!-from-p128    #:vmovdqu      (u32.4) (p128)        :cost 1 :encoding :move :always-translatable nil)
   (u32.4!-from-p256    #:vextractf128 (u32.4) (p256)        :cost 1 :suffix '(0) :always-translatable nil)
   (make-u32.4          nil            (u32.4) (u32 u32 u32 u32) :cost 1 :encoding :fake-vop)
   (u32.4-values        nil            (u32 u32 u32 u32) (u32.4) :cost 1 :encoding :fake-vop)
   (u32.4-broadcast     nil            (u32.4) (u32)         :cost 1 :encoding :fake-vop)
   (u32.4-blend         #:vpblendvb    (u32.4) (u32.4 u32.4 u32.4) :cost 1)
   (two-arg-u32.4-and   #:vpand        (u32.4) (u32.4 u32.4) :cost 1 :associative t)
   (two-arg-u32.4-or    #:vpor         (u32.4) (u32.4 u32.4) :cost 1 :associative t)
   (two-arg-u32.4-xor   #:vpxor        (u32.4) (u32.4 u32.4) :cost 1 :associative t)
   (u32.4-andc1         #:vpandn       (u32.4) (u32.4 u32.4) :cost 1)
   (u32.4-not           nil            (u32.4) (u32.4)       :cost 1 :encoding :fake-vop)
   (two-arg-u32.4+      #:vpaddd       (u32.4) (u32.4 u32.4) :cost 2 :associative t)
   (two-arg-u32.4-      #:vpsubd       (u32.4) (u32.4 u32.4) :cost 2)
   (two-arg-u32.4=      #:vpcmpeqd     (u32.4) (u32.4 u32.4) :cost 1 :associative t)
   (two-arg-u32.4/=     nil            (u32.4) (u32.4 u32.4) :cost 2 :associative t :encoding :fake-vop)
   (two-arg-u32.4>~     #:vpcmpgtd     (u32.4) (u32.4 u32.4) :cost 1)
   (two-arg-u32.4>      nil            (u32.4) (u32.4 u32.4) :cost 1 :encoding :fake-vop)
   (two-arg-u32.4<      nil            (u32.4) (u32.4 u32.4) :cost 1 :encoding :fake-vop)
   (two-arg-u32.4>=     nil            (u32.4) (u32.4 u32.4) :cost 2 :encoding :fake-vop)
   (two-arg-u32.4<=     nil            (u32.4) (u32.4 u32.4) :cost 2 :encoding :fake-vop)
   (u32.4-unpackhi      #:vpunpckhdq   (u32.4) (u32.4 u32.4) :cost 1)
   (u32.4-unpacklo      #:vpunpckldq   (u32.4) (u32.4 u32.4) :cost 1)
   (u32.4-movemask      #:vmovmskps    (u4)    (u32.4)       :cost 1)
   (u32.4-permute       #:vpermilps    (u32.4) (u32.4 imm8)  :cost 1)
   ;; u64.2
   (u64.2!-from-u64     #:movq         (u64.2) (u64)         :cost 1)
   (u64.2!-from-p128    #:vmovdqu      (u64.2) (p128)        :cost 1 :encoding :move :always-translatable nil)
   (u64.2!-from-p256    #:vextractf128 (u64.2) (p256)        :cost 1 :suffix '(0) :always-translatable nil)
   (make-u64.2          nil            (u64.2) (u64 u64)     :cost 1 :encoding :fake-vop)
   (u64.2-values        nil            (u64 u64) (u64.2)     :cost 1 :encoding :fake-vop)
   (u64.2-broadcast     nil            (u64.2) (u64)         :cost 1 :encoding :fake-vop)
   (u64.2-from-u64      #:movq         (u64.2) (u64)         :cost 1)
   (u64.2-blend         #:vpblendvb    (u64.2) (u64.2 u64.2 u64.2) :cost 1)
   (two-arg-u64.2-and   #:vpand        (u64.2) (u64.2 u64.2) :cost 1 :associative t)
   (two-arg-u64.2-or    #:vpor         (u64.2) (u64.2 u64.2) :cost 1 :associative t)
   (two-arg-u64.2-xor   #:vpxor        (u64.2) (u64.2 u64.2) :cost 1 :associative t)
   (u64.2-andc1         #:vpandn       (u64.2) (u64.2 u64.2) :cost 1)
   (u64.2-not           nil            (u64.2) (u64.2)       :cost 1 :encoding :fake-vop)
   (two-arg-u64.2+      #:vpaddq       (u64.2) (u64.2 u64.2) :cost 2 :associative t)
   (two-arg-u64.2-      #:vpsubq       (u64.2) (u64.2 u64.2) :cost 2)
   (two-arg-u64.2=      #:vpcmpeqq     (u64.2) (u64.2 u64.2) :cost 1 :associative t)
   (two-arg-u64.2/=     nil            (u64.2) (u64.2 u64.2) :cost 2 :associative t :encoding :fake-vop)
   (two-arg-u64.2>~     #:vpcmpgtq     (u64.2) (u64.2 u64.2) :cost 1)
   (two-arg-u64.2>      nil            (u64.2) (u64.2 u64.2) :cost 1 :encoding :fake-vop)
   (two-arg-u64.2<      nil            (u64.2) (u64.2 u64.2) :cost 1 :encoding :fake-vop)
   (two-arg-u64.2>=     nil            (u64.2) (u64.2 u64.2) :cost 2 :encoding :fake-vop)
   (two-arg-u64.2<=     nil            (u64.2) (u64.2 u64.2) :cost 2 :encoding :fake-vop)
   (u64.2-unpackhi      #:vpunpckhqdq  (u64.2) (u64.2 u64.2) :cost 1)
   (u64.2-unpacklo      #:vpunpcklqdq  (u64.2) (u64.2 u64.2) :cost 1)
   (u64.2-movemask      #:vmovmskpd    (u2)    (u64.2)       :cost 1)
   (u64.2-permute       #:vpermilpd    (u64.2) (u64.2 imm8)  :cost 1)
   ;; u8.32
   (u8.32!-from-u8      #:movq         (u8.32) (u8) :cost 1)
   (u8.32!-from-p128    #:vmovdqu      (u8.32) (p128) :cost 1 :encoding :move :always-translatable nil)
   (u8.32!-from-p256    #:vmovdqu      (u8.32) (p256) :cost 1 :encoding :move :always-translatable nil)
   (make-u8.32          nil            (u8.32) (u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8) :cost 1 :encoding :fake-vop)
   (u8.32-values        nil            (u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8 u8) (u8.32) :cost 1 :encoding :fake-vop)
   (u8.32-broadcast     nil            (u8.32) (u8) :cost 1 :encoding :fake-vop)
   (u8.16-from-u8.32    #:vextractf128 (u8.16) (u8.32 imm1) :cost 1)
   (u8.32-insert-u8.16  #:vinsertf128  (u8.32) (u8.32 u8.16 imm1) :cost 1)
   ;; u16.16
   (u16.16!-from-u16    #:movq         (u16.16) (u16) :cost 1)
   (u16.16!-from-p128   #:vmovdqu      (u16.16) (p128) :cost 1 :encoding :move :always-translatable nil)
   (u16.16!-from-p256   #:vmovdqu      (u16.16) (p256) :cost 1 :encoding :move :always-translatable nil)
   (make-u16.16         nil            (u16.16) (u16 u16 u16 u16 u16 u16 u16 u16 u16 u16 u16 u16 u16 u16 u16 u16) :cost 1 :encoding :fake-vop)
   (u16.16-values       nil            (u16 u16 u16 u16 u16 u16 u16 u16 u16 u16 u16 u16 u16 u16 u16 u16) (u16.16) :cost 1 :encoding :fake-vop)
   (u16.16-broadcast    nil            (u16.16) (u16) :cost 1 :encoding :fake-vop)
   (u16.8-from-u16.16   #:vextractf128 (u16.8)  (u16.16 imm1) :cost 1)
   (u16.16-insert-u16.8 #:vinsertf128  (u16.16) (u16.16 u16.8 imm1) :cost 1)
   ;; u32.8
   (u32.8!-from-u32     #:movq         (u32.8) (u32) :cost 1)
   (u32.8!-from-p128    #:vmovdqu      (u32.8) (p128) :cost 1 :encoding :move :always-translatable nil)
   (u32.8!-from-p256    #:vmovdqu      (u32.8) (p256) :cost 1 :encoding :move :always-translatable nil)
   (make-u32.8          nil            (u32.8) (u32 u32 u32 u32 u32 u32 u32 u32) :cost 1 :encoding :fake-vop)
   (two-arg-u32.8-and   #:vandps       (u32.8) (u32.8 u32.8) :cost 1 :associative t)
   (u32.8-values        nil            (u32 u32 u32 u32 u32 u32 u32 u32) (u32.8) :cost 1 :encoding :fake-vop)
   (u32.8-broadcast     nil            (u32.8) (u32) :cost 1 :encoding :fake-vop)
   (u32.8-permute       #:vpermilps    (u32.8) (u32.4 imm8) :cost 1)
   (u32.4-from-u32.8    #:vextractf128 (u32.4) (u32.8 imm1) :cost 1)
   (u32.8-insert-u32.4  #:vinsertf128  (u32.8) (u32.8 u32.4 imm1) :cost 1)
   ;; u64.4
   (u64.4!-from-u64     #:movq         (u64.4) (u64) :cost 1)
   (u64.4!-from-p128    #:vmovdqu      (u64.4) (p128) :cost 1 :encoding :move :always-translatable nil)
   (u64.4!-from-p256    #:vmovdqu      (u64.4) (p256) :cost 1 :encoding :move :always-translatable nil)
   (make-u64.4          nil            (u64.4) (u64 u64 u64 u64) :cost 1 :encoding :fake-vop)
   (two-arg-u64.4-and   #:vandpd       (u64.4) (u64.4 u64.4) :cost 1 :associative t)
   (u64.4-values        nil            (u64 u64 u64 u64) (u64.4) :cost 1 :encoding :fake-vop)
   (u64.4-broadcast     nil            (u64.4) (u64) :cost 1 :encoding :fake-vop)
   (u64.4-permute       #:vpermilpd    (u64.4) (u64.4 imm8) :cost 1)
   (u64.2-from-u64.4    #:vextractf128 (u64.2) (u64.4 imm1) :cost 1)
   (u64.4-insert-u64.2  #:vinsertf128  (u64.4) (u64.4 u64.2 imm1) :cost 1)
   ;; s8.16
   (s8.16!-from-s8      nil            (s8.16) (s8)          :cost 1 :encoding :fake-vop)
   (s8.16!-from-p128    #:vmovdqu      (s8.16) (p128)        :cost 1 :encoding :move :always-translatable nil)
   (s8.16!-from-p256    #:vextractf128 (s8.16) (p256)        :cost 1 :suffix '(0) :always-translatable nil)
   (make-s8.16          nil            (s8.16) (s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8) :cost 1 :encoding :fake-vop)
   (s8.16-values        nil            (s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8) (s8.16) :cost 1 :encoding :fake-vop)
   (s8.16-broadcast     nil            (s8.16)    (s8)       :cost 1 :encoding :fake-vop)
   (s8.16-blend         #:vpblendvb    (s8.16) (s8.16 s8.16 u8.16) :cost 1)
   (two-arg-s8.16-and   #:vpand        (s8.16) (s8.16 s8.16) :cost 1 :associative t)
   (two-arg-s8.16-or    #:vpor         (s8.16) (s8.16 s8.16) :cost 1 :associative t)
   (two-arg-s8.16-xor   #:vpxor        (s8.16) (s8.16 s8.16) :cost 1 :associative t)
   (s8.16-andc1         #:vpandn       (s8.16) (s8.16 s8.16) :cost 1)
   (s8.16-not           nil            (s8.16) (s8.16)       :cost 1 :encoding :fake-vop)
   (two-arg-s8.16+      #:vpaddb       (s8.16) (s8.16 s8.16) :cost 2 :associative t)
   (two-arg-s8.16-      #:vpsubb       (s8.16) (s8.16 s8.16) :cost 2)
   (two-arg-s8.16=      #:vpcmpeqb     (u8.16) (s8.16 s8.16) :cost 1 :associative t)
   (two-arg-s8.16/=     nil            (u8.16) (s8.16 s8.16) :cost 2 :associative t :encoding :fake-vop)
   (two-arg-s8.16>      #:vpcmpgtb     (u8.16) (s8.16 s8.16) :cost 1)
   (two-arg-s8.16<      nil            (u8.16) (s8.16 s8.16) :cost 1 :encoding :fake-vop)
   (two-arg-s8.16>=     nil            (u8.16) (s8.16 s8.16) :cost 2 :encoding :fake-vop)
   (two-arg-s8.16<=     nil            (u8.16) (s8.16 s8.16) :cost 2 :encoding :fake-vop)
   (s8.16-unpackhi      #:vpunpckhbw   (s8.16) (s8.16 s8.16) :cost 1)
   (s8.16-unpacklo      #:vpunpcklbw   (s8.16) (s8.16 s8.16) :cost 1)
   (s8.16-movemask      #:vpmovmskb    (u16)   (s8.16)       :cost 1)
   (s8.16-shuffle       #:vpshufb      (s8.16) (s8.16 u8.16) :cost 1)
   ;; s16.8
   (s16.8!-from-s16     nil            (s16.8) (s16)         :cost 1 :encoding :fake-vop)
   (s16.8!-from-p128    #:vmovdqu      (s16.8) (p128)        :cost 1 :encoding :move :always-translatable nil)
   (s16.8!-from-p256    #:vextractf128 (s16.8) (p256)        :cost 1 :suffix '(0) :always-translatable nil)
   (make-s16.8          nil            (s16.8) (s16 s16 s16 s16 s16 s16 s16 s16) :cost 1 :encoding :fake-vop)
   (s16.8-values        nil            (s16 s16 s16 s16 s16 s16 s16 s16) (s16.8) :cost 1 :encoding :fake-vop)
   (s16.8-broadcast     nil            (s16.8) (s16)         :cost 1 :encoding :fake-vop)
   (s16.8-blend         #:vpblendvb    (s16.8) (s16.8 s16.8 u16.8) :cost 1)
   (two-arg-s16.8-and   #:vpand        (s16.8) (s16.8 s16.8) :cost 1 :associative t)
   (two-arg-s16.8-or    #:vpor         (s16.8) (s16.8 s16.8) :cost 1 :associative t)
   (two-arg-s16.8-xor   #:vpxor        (s16.8) (s16.8 s16.8) :cost 1 :associative t)
   (s16.8-andc1         #:vpandn       (s16.8) (s16.8 s16.8) :cost 1)
   (s16.8-not           nil            (s16.8) (s16.8)       :cost 1 :encoding :fake-vop)
   (two-arg-s16.8+      #:vpaddw       (s16.8) (s16.8 s16.8) :cost 2 :associative t)
   (two-arg-s16.8-      #:vpsubw       (s16.8) (s16.8 s16.8) :cost 2)
   (two-arg-s16.8-mullo #:vpmullw      (s16.8) (s16.8 s16.8) :cost 2 :associative t)
   (s16.8-shiftl        #:vpsllq       (s16.8) (s16.8 s16.8) :cost 1)
   (s16.8-shiftr        #:vpsrlq       (s16.8) (s16.8 s16.8) :cost 1)
   (two-arg-s16.8=      #:vpcmpeqw     (u16.8) (s16.8 s16.8) :cost 1 :associative t)
   (two-arg-s16.8/=     nil            (u16.8) (s16.8 s16.8) :cost 2 :associative t :encoding :fake-vop)
   (two-arg-s16.8>      #:vpcmpgtw     (u16.8) (s16.8 s16.8) :cost 1)
   (two-arg-s16.8<      nil            (u16.8) (s16.8 s16.8) :cost 1 :encoding :fake-vop)
   (two-arg-s16.8>=     nil            (u16.8) (s16.8 s16.8) :cost 2 :encoding :fake-vop)
   (two-arg-s16.8<=     nil            (u16.8) (s16.8 s16.8) :cost 2 :encoding :fake-vop)
   (s16.8-shiftl        #:vpsllw       (s16.8) (s16.8 s16.8) :cost 1)
   (s16.8-shiftr        #:vpsrlw       (s16.8) (s16.8 s16.8) :cost 1)
   (s16.8-unpackhi      #:vpunpckhwd   (s16.8) (s16.8 s16.8) :cost 1)
   (s16.8-unpacklo      #:vpunpcklwd   (s16.8) (s16.8 s16.8) :cost 1)
   (s16.8-movemask      nil            (u8)    (s16.8)       :cost 1 :encoding :fake-vop)
   (s16.8-shufflehi     #:vpshufhw     (s16.8) (s16.8 imm8)  :cost 1)
   (s16.8-shufflelo     #:vpshuflw     (s16.8) (s16.8 imm8)  :cost 1)
   (two-arg-s16.8-mullo #:vpmullw      (s16.8) (s16.8 s16.8) :cost 2 :associative t)
   ;; s32.4
   (s32.4!-from-s32     nil            (s32.4) (s32)         :cost 1 :encoding :fake-vop)
   (s32.4!-from-p128    #:vmovdqu      (s32.4) (p128)        :cost 1 :encoding :move :always-translatable nil)
   (s32.4!-from-p256    #:vextractf128 (s32.4) (p256)        :cost 1 :suffix '(0) :always-translatable nil)
   (make-s32.4          nil            (s32.4) (s32 s32 s32 s32) :cost 1 :encoding :fake-vop)
   (s32.4-values        nil            (s32 s32 s32 s32) (s32.4) :cost 1 :encoding :fake-vop)
   (s32.4-broadcast     nil            (s32.4) (s32)         :cost 1 :encoding :fake-vop)
   (s32.4-blend         #:vpblendvb    (s32.4) (s32.4 s32.4 u32.4) :cost 1)
   (s32.4-from-f64.4    #:vcvtpd2dq    (s32.4) (f64.4)       :cost 5)
   (s32.4-from-f32.4    #:vcvtps2dq    (s32.4) (f32.4)       :cost 5)
   (two-arg-s32.4-and   #:vpand        (s32.4) (s32.4 s32.4) :cost 1 :associative t)
   (two-arg-s32.4-or    #:vpor         (s32.4) (s32.4 s32.4) :cost 1 :associative t)
   (two-arg-s32.4-xor   #:vpxor        (s32.4) (s32.4 s32.4) :cost 1 :associative t)
   (s32.4-andc1         #:vpandn       (s32.4) (s32.4 s32.4) :cost 1)
   (s32.4-not           nil            (s32.4) (s32.4)       :cost 1 :encoding :fake-vop)
   (two-arg-s32.4+      #:vpaddd       (s32.4) (s32.4 s32.4) :cost 2 :associative t)
   (two-arg-s32.4-      #:vpsubd       (s32.4) (s32.4 s32.4) :cost 2)
   (two-arg-s32.4-mullo #:vpmulld      (s32.4) (s32.4 s32.4) :cost 2 :associative t)
   (two-arg-s32.4=      #:vpcmpeqd     (u32.4) (s32.4 s32.4) :cost 1 :associative t)
   (two-arg-s32.4/=     nil            (u32.4) (s32.4 s32.4) :cost 2 :associative t :encoding :fake-vop)
   (two-arg-s32.4>      #:vpcmpgtd     (u32.4) (s32.4 s32.4) :cost 1)
   (two-arg-s32.4<      nil            (u32.4) (s32.4 s32.4) :cost 1 :encoding :fake-vop)
   (two-arg-s32.4>=     nil            (u32.4) (s32.4 s32.4) :cost 2 :encoding :fake-vop)
   (two-arg-s32.4<=     nil            (u32.4) (s32.4 s32.4) :cost 2 :encoding :fake-vop)
   (s32.4-unpackhi      #:vpunpckhdq   (s32.4) (s32.4 s32.4) :cost 1)
   (s32.4-unpacklo      #:vpunpckldq   (s32.4) (s32.4 s32.4) :cost 1)
   (s32.4-movemask      #:vmovmskps    (u4)    (s32.4)       :cost 1)
   (s32.4-permute       #:vpermilps    (s32.4) (s32.4 imm8)  :cost 1)
   ;; s64.2
   (s64.2!-from-s64     nil            (s64.2) (s64)         :cost 1 :encoding :fake-vop)
   (s64.2!-from-p128    #:vmovdqu      (s64.2) (p128)        :cost 1 :encoding :move :always-translatable nil)
   (s64.2!-from-p256    #:vextractf128 (s64.2) (p256)        :cost 1 :suffix '(0) :always-translatable nil)
   (make-s64.2          nil            (s64.2) (s64 s64)     :cost 1 :encoding :fake-vop)
   (s64.2-values        nil            (s64 s64) (s64.2)     :cost 1 :encoding :fake-vop)
   (s64.2-broadcast     nil            (s64.2) (s64)         :cost 1 :encoding :fake-vop)
   (s64.2-blend         #:vpblendvb    (s64.2) (s64.2 s64.2 u64.2) :cost 1)
   (two-arg-s64.2-and   #:vpand        (s64.2) (s64.2 s64.2) :cost 1 :associative t)
   (two-arg-s64.2-or    #:vpor         (s64.2) (s64.2 s64.2) :cost 1 :associative t)
   (two-arg-s64.2-xor   #:vpxor        (s64.2) (s64.2 s64.2) :cost 1 :associative t)
   (s64.2-andc1         #:vpandn       (s64.2) (s64.2 s64.2) :cost 1)
   (s64.2-not           nil            (s64.2) (s64.2)       :cost 1 :encoding :fake-vop)
   (two-arg-s64.2+      #:vpaddq       (s64.2) (s64.2 s64.2) :cost 2 :associative t)
   (two-arg-s64.2-      #:vpsubq       (s64.2) (s64.2 s64.2) :cost 2)
   (s64.2-shiftl        #:vpsllq       (s64.2) (s64.2 s64.2) :cost 1)
   (s64.2-shiftr        #:vpsrlq       (s64.2) (s64.2 s64.2) :cost 1)
   (two-arg-s64.2=      #:vpcmpeqq     (u64.2) (s64.2 s64.2) :cost 1 :associative t)
   (two-arg-s64.2/=     nil            (u64.2) (s64.2 s64.2) :cost 2 :associative t :encoding :fake-vop)
   (two-arg-s64.2>      #:vpcmpgtq     (u64.2) (s64.2 s64.2) :cost 1)
   (two-arg-s64.2<      nil            (u64.2) (s64.2 s64.2) :cost 1 :encoding :fake-vop)
   (two-arg-s64.2>=     nil            (u64.2) (s64.2 s64.2) :cost 2 :encoding :fake-vop)
   (two-arg-s64.2<=     nil            (u64.2) (s64.2 s64.2) :cost 2 :encoding :fake-vop)
   (s64.2-unpackhi      #:vpunpckhqdq  (s64.2) (s64.2 s64.2) :cost 1)
   (s64.2-unpacklo      #:vpunpcklqdq  (s64.2) (s64.2 s64.2) :cost 1)
   (s64.2-movemask      #:vmovmskpd    (u2)    (s64.2)       :cost 1)
   (s64.2-permute       #:vpermilpd    (s64.2) (s64.2 imm8)  :cost 1)
   ;; s8.32
   (s8.32!-from-s8      nil            (s8.32) (s8) :cost 1 :encoding :fake-vop)
   (s8.32!-from-p128    #:vmovdqu      (s8.32) (p128) :cost 1 :encoding :move :always-translatable nil)
   (s8.32!-from-p256    #:vmovdqu      (s8.32) (p256) :cost 1 :encoding :move :always-translatable nil)
   (make-s8.32          nil            (s8.32) (s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8) :cost 1 :encoding :fake-vop)
   (s8.32-values        nil            (s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8 s8) (s8.32) :cost 1 :encoding :fake-vop)
   (s8.32-broadcast     nil            (s8.32) (s8) :cost 1 :encoding :fake-vop)
   (s8.16-from-s8.32    #:vextractf128 (s8.16) (s8.32 imm1) :cost 1)
   (s8.32-insert-s8.16  #:vinsertf128  (s8.32) (s8.32 s8.16 imm1) :cost 1)
   (s8.32-permute128    #:vperm2f128   (s8.32) (s8.32 s8.32 imm8) :cost 1)
   ;; s16.16
   (s16.16!-from-s16    nil            (s16.16) (s16) :cost 1 :encoding :fake-vop)
   (s16.16!-from-p128   #:vmovdqu      (s16.16) (p128) :cost 1 :encoding :move :always-translatable nil)
   (s16.16!-from-p256   #:vmovdqu      (s16.16) (p256) :cost 1 :encoding :move :always-translatable nil)
   (make-s16.16         nil            (s16.16) (s16 s16 s16 s16 s16 s16 s16 s16 s16 s16 s16 s16 s16 s16 s16 s16) :cost 1 :encoding :fake-vop)
   (s16.16-values       nil            (s16 s16 s16 s16 s16 s16 s16 s16 s16 s16 s16 s16 s16 s16 s16 s16) (s16.16) :cost 1 :encoding :fake-vop)
   (s16.16-broadcast    nil            (s16.16) (s16) :cost 1 :encoding :fake-vop)
   (s16.8-from-s16.16   #:vextractf128 (s16.8) (s16.16 imm1) :cost 1)
   (s16.16-insert-s16.8 #:vinsertf128  (s16.16) (s16.16 s16.8 imm1) :cost 1)
   (s16.16-permute128   #:vperm2f128   (s16.16) (s16.16 s16.16 imm8) :cost 1)
   ;; s32.8
   (s32.8!-from-s32     nil            (s32.8) (s32) :cost 1 :encoding :fake-vop)
   (s32.8!-from-p128    #:vmovdqu      (s32.8) (p128) :cost 1 :encoding :move :always-translatable nil)
   (s32.8!-from-p256    #:vmovdqu      (s32.8) (p256) :cost 1 :encoding :move :always-translatable nil)
   (s32.8-from-f64.4    #:vcvtpd2dq    (s32.8) (f64.4) :cost 5)
   (make-s32.8          nil            (s32.8) (s32 s32 s32 s32 s32 s32 s32 s32) :cost 1 :encoding :fake-vop)
   (s32.8-values        nil            (s32 s32 s32 s32 s32 s32 s32 s32) (s32.8) :cost 1 :encoding :fake-vop)
   (s32.8-broadcast     nil            (s32.8) (s32) :cost 1 :encoding :fake-vop)
   (s32.8-from-f32.8    #:vcvtps2dq    (s32.8) (f32.8)       :cost 4)
   (s32.8-permute       #:vpermilps    (s32.8) (s32.4 imm8) :cost 1)
   (s32.4-from-s32.8    #:vextractf128 (s32.4) (s32.8 imm1) :cost 1)
   (s32.8-insert-s32.4  #:vinsertf128  (s32.8) (s32.8 s32.4 imm1) :cost 1)
   (s32.8-permute128    #:vperm2f128   (s32.8) (s32.8 s32.8 imm8) :cost 1)
   ;; s64.4
   (s64.4!-from-s64     nil            (s64.4) (s64) :cost 1 :encoding :fake-vop)
   (s64.4!-from-p128    #:vmovdqu      (s64.4) (p128) :cost 1 :encoding :move :always-translatable nil)
   (s64.4!-from-p256    #:vmovdqu      (s64.4) (p256) :cost 1 :encoding :move :always-translatable nil)
   (make-s64.4          nil            (s64.4) (s64 s64 s64 s64) :cost 1 :encoding :fake-vop)
   (s64.4-values        nil            (s64 s64 s64 s64) (s64.4) :cost 1 :encoding :fake-vop)
   (s64.4-broadcast     nil            (s64.4) (s64) :cost 1 :encoding :fake-vop)
   (s64.4-permute       #:vpermilpd    (s64.4) (s64.4 imm8) :cost 1)
   (s64.2-from-s64.4    #:vextractf128 (s64.2) (s64.4 imm1) :cost 1)
   (s64.4-insert-s64.2  #:vinsertf128  (s64.4) (s64.4 s64.2 imm1) :cost 1)
   (s64.4-permute128    #:vperm2f128   (s64.4) (s64.4 s64.4 imm8) :cost 1))
  (:loads
   #+sb-unicode
   (u32.4-load-from-string #:vmovdqu u32.4 charvec char-array u32.4-string-ref u32.4-row-major-string-ref)
   #+sb-unicode
   (u32.8-load-from-string #:vmovdqu u32.8 charvec char-array u32.8-string-ref u32.8-row-major-string-ref)
   (f32-load    #:vmovss  f32    f32vec f32-array f32-aref   f32-row-major-aref)
   (f64-load    #:vmovsd  f64    f64vec f64-array f64-aref   f64-row-major-aref)
   (f32.4-load  #:vmovups f32.4  f32vec f32-array f32.4-aref f32.4-row-major-aref)
   (f64.2-load  #:vmovupd f64.2  f64vec f64-array f64.2-aref f64.2-row-major-aref)
   (f32.8-load  #:vmovups f32.8  f32vec f32-array f32.8-aref f32.8-row-major-aref)
   (f64.4-load  #:vmovupd f64.4  f64vec f64-array f64.4-aref f64.4-row-major-aref)
   (u8.16-load  #:vmovdqu u8.16   u8vec  u8-array u8.16-aref u8.16-row-major-aref)
   (u16.8-load  #:vmovdqu u16.8  u16vec u16-array u16.8-aref u16.8-row-major-aref)
   (u32.4-load  #:vmovdqu u32.4  u32vec u32-array u32.4-aref u32.4-row-major-aref)
   (u64.2-load  #:vmovdqu u64.2  u64vec u64-array u64.2-aref u64.2-row-major-aref)
   (u8.32-load  #:vmovdqu u8.32   u8vec  u8-array u8.32-aref u8.32-row-major-aref)
   (u16.16-load #:vmovdqu u16.16 u16vec u16-array u16.16-aref u16.16-row-major-aref)
   (u32.8-load  #:vmovdqu u32.8  u32vec u32-array u32.8-aref u32.8-row-major-aref)
   (u64.4-load  #:vmovdqu u64.4  u64vec u64-array u64.4-aref u64.4-row-major-aref)
   (s8.16-load  #:vmovdqu s8.16   s8vec  s8-array s8.16-aref s8.16-row-major-aref)
   (s16.8-load  #:vmovdqu s16.8  s16vec s16-array s16.8-aref s16.8-row-major-aref)
   (s32.4-load  #:vmovdqu s32.4  s32vec s32-array s32.4-aref s32.4-row-major-aref)
   (s64.2-load  #:vmovdqu s64.2  s64vec s64-array s64.2-aref s64.2-row-major-aref)
   (s8.32-load  #:vmovdqu s8.32   s8vec  s8-array s8.32-aref s8.32-row-major-aref)
   (s16.16-load #:vmovdqu s16.16 s16vec s16-array s16.16-aref s16.16-row-major-aref)
   (s32.8-load  #:vmovdqu s32.8  s32vec s32-array s32.8-aref s32.8-row-major-aref)
   (s64.4-load  #:vmovdqu s64.4  s64vec s64-array s64.4-aref s64.4-row-major-aref))
  (:stores
   #+sb-unicode
   (u32.4-store-into-string #:vmovdqu u32.4 charvec char-array u32.4-string-ref u32.4-row-major-string-ref)
   #+sb-unicode
   (u32.8-store-into-string #:vmovdqu u32.8 charvec char-array u32.8-string-ref u32.8-row-major-string-ref)
   (f32-store   #:vmovss  f32    f32vec f32-array f32-aref   f32-row-major-aref)
   (f64-store   #:vmovsd  f64    f64vec f64-array f64-aref   f64-row-major-aref)
   (f32.4-store #:vmovups f32.4  f32vec f32-array f32.4-aref f32.4-row-major-aref)
   (f64.2-store #:vmovupd f64.2  f64vec f64-array f64.2-aref f64.2-row-major-aref)
   (f32.8-store #:vmovups f32.8  f32vec f32-array f32.8-aref f32.8-row-major-aref)
   (f64.4-store #:vmovupd f64.4  f64vec f64-array f64.4-aref f64.4-row-major-aref)
   (u8.16-store #:vmovdqu u8.16   u8vec  u8-array u8.16-aref u8.16-row-major-aref)
   (u16.8-store #:vmovdqu u16.8  u16vec u16-array u16.8-aref u16.8-row-major-aref)
   (u32.4-store #:vmovdqu u32.4  u32vec u32-array u32.4-aref u32.4-row-major-aref)
   (u64.2-store #:vmovdqu u64.2  u64vec u64-array u64.2-aref u64.2-row-major-aref)
   (s8.16-store #:vmovdqu s8.16   s8vec  s8-array s8.16-aref s8.16-row-major-aref)
   (s16.8-store #:vmovdqu s16.8  s16vec s16-array s16.8-aref s16.8-row-major-aref)
   (s32.4-store #:vmovdqu s32.4  s32vec s32-array s32.4-aref s32.4-row-major-aref)
   (s64.2-store #:vmovdqu s64.2  s64vec s64-array s64.2-aref s64.2-row-major-aref)
   (u8.32-store    #:vmovdqu  u8.32   u8vec  u8-array u8.32-aref  u8.32-row-major-aref)
   (u16.16-store   #:vmovdqu  u16.16 u16vec u16-array u16.16-aref u16.16-row-major-aref)
   (u32.8-store    #:vmovdqu  u32.8  u32vec u32-array u32.8-aref  u32.8-row-major-aref)
   (u64.4-store    #:vmovdqu  u64.4  u64vec u64-array u64.4-aref  u64.4-row-major-aref)
   (s8.32-store    #:vmovdqu  s8.32   s8vec  s8-array s8.32-aref  s8.32-row-major-aref)
   (s16.16-store   #:vmovdqu  s16.16 s16vec s16-array s16.16-aref s16.16-row-major-aref)
   (s32.8-store    #:vmovdqu  s32.8  s32vec s32-array s32.8-aref  s32.8-row-major-aref)
   (s64.4-store    #:vmovdqu  s64.4  s64vec s64-array s64.4-aref  s64.4-row-major-aref)
   (f32.4-ntstore  #:vmovntps f32.4  f32vec f32-array f32.4-non-temporal-aref  f32.4-non-temporal-row-major-aref)
   (f64.2-ntstore  #:vmovntpd f64.2  f64vec f64-array f64.2-non-temporal-aref  f64.2-non-temporal-row-major-aref)
   (f32.8-ntstore  #:vmovntps f32.8  f32vec f32-array f32.8-non-temporal-aref  f32.8-non-temporal-row-major-aref)
   (f64.4-ntstore  #:vmovntpd f64.4  f64vec f64-array f64.4-non-temporal-aref  f64.4-non-temporal-row-major-aref)
   (u8.16-ntstore  #:vmovntdq u8.16   u8vec  u8-array u8.16-non-temporal-aref  u8.16-non-temporal-row-major-aref)
   (u16.8-ntstore  #:vmovntdq u16.8  u16vec u16-array u16.8-non-temporal-aref  u16.8-non-temporal-row-major-aref)
   (u32.4-ntstore  #:vmovntdq u32.4  u32vec u32-array u32.4-non-temporal-aref  u32.4-non-temporal-row-major-aref)
   (u64.2-ntstore  #:vmovntdq u64.2  u64vec u64-array u64.2-non-temporal-aref  u64.2-non-temporal-row-major-aref)
   (s8.16-ntstore  #:vmovntdq s8.16   s8vec  s8-array s8.16-non-temporal-aref  s8.16-non-temporal-row-major-aref)
   (s16.8-ntstore  #:vmovntdq s16.8  s16vec s16-array s16.8-non-temporal-aref  s16.8-non-temporal-row-major-aref)
   (s32.4-ntstore  #:vmovntdq s32.4  s32vec s32-array s32.4-non-temporal-aref  s32.4-non-temporal-row-major-aref)
   (s64.2-ntstore  #:vmovntdq s64.2  s64vec s64-array s64.2-non-temporal-aref  s64.2-non-temporal-row-major-aref)
   (u8.32-ntstore  #:vmovntdq u8.32   u8vec  u8-array u8.32-non-temporal-aref  u8.32-non-temporal-row-major-aref)
   (u16.16-ntstore #:vmovntdq u16.16 u16vec u16-array u16.16-non-temporal-aref u16.16-non-temporal-row-major-aref)
   (u32.8-ntstore  #:vmovntdq u32.8  u32vec u32-array u32.8-non-temporal-aref  u32.8-non-temporal-row-major-aref)
   (u64.4-ntstore  #:vmovntdq u64.4  u64vec u64-array u64.4-non-temporal-aref  u64.4-non-temporal-row-major-aref)
   (s8.32-ntstore  #:vmovntdq s8.32   s8vec  s8-array s8.32-non-temporal-aref  s8.32-non-temporal-row-major-aref)
   (s16.16-ntstore #:vmovntdq s16.16 s16vec s16-array s16.16-non-temporal-aref s16.16-non-temporal-row-major-aref)
   (s32.8-ntstore  #:vmovntdq s32.8  s32vec s32-array s32.8-non-temporal-aref  s32.8-non-temporal-row-major-aref)
   (s64.4-ntstore  #:vmovntdq s64.4  s64vec s64-array s64.4-non-temporal-aref  s64.4-non-temporal-row-major-aref))
  (:associatives
   (f32-and two-arg-f32-and +f32-true+)
   (f32-or  two-arg-f32-or  +f32-false+)
   (f32-xor two-arg-f32-xor +f32-false+)
   (f32-max two-arg-f32-max nil)
   (f32-min two-arg-f32-min nil)
   (f32+    two-arg-f32+ 0f0)
   (f32*    two-arg-f32* 1f0)
   (f64-and two-arg-f64-and +f64-true+)
   (f64-or  two-arg-f64-or  +f64-false+)
   (f64-xor two-arg-f64-xor +f64-false+)
   (f64-max two-arg-f64-max nil)
   (f64-min two-arg-f64-min nil)
   (f64+    two-arg-f64+ 0d0)
   (f64*    two-arg-f64* 1d0)
   (f32.4-and two-arg-f32.4-and +f32-true+)
   (f32.4-or  two-arg-f32.4-or  +f32-false+)
   (f32.4-xor two-arg-f32.4-xor +f32-false+)
   (f32.4-max two-arg-f32.4-max nil)
   (f32.4-min two-arg-f32.4-min nil)
   (f32.4+    two-arg-f32.4+ 0f0)
   (f32.4*    two-arg-f32.4* 1f0)
   (f64.2-and two-arg-f64.2-and +f64-true+)
   (f64.2-or  two-arg-f64.2-or  +f64-false+)
   (f64.2-xor two-arg-f64.2-xor +f64-false+)
   (f64.2-max two-arg-f64.2-max nil)
   (f64.2-min two-arg-f64.2-min nil)
   (f64.2+    two-arg-f64.2+ 0d0)
   (f64.2*    two-arg-f64.2* 1d0)
   (f32.8-and two-arg-f32.8-and +f32-true+)
   (f32.8-or  two-arg-f32.8-or  +f32-false+)
   (f32.8-xor two-arg-f32.8-xor +f32-false+)
   (f32.8-max two-arg-f32.8-max nil)
   (f32.8-min two-arg-f32.8-min nil)
   (f32.8+    two-arg-f32.8+ 0f0)
   (f32.8*    two-arg-f32.8* 1f0)
   (f64.4-and two-arg-f64.4-and +f64-true+)
   (f64.4-or  two-arg-f64.4-or  +f64-false+)
   (f64.4-xor two-arg-f64.4-xor +f64-false+)
   (f64.4-max two-arg-f64.4-max nil)
   (f64.4-min two-arg-f64.4-min nil)
   (f64.4+    two-arg-f64.4+ 0d0)
   (f64.4*    two-arg-f64.4* 1d0)
   (u8.16-and two-arg-u8.16-and +u8-true+)
   (u8.16-or  two-arg-u8.16-or  +u8-false+)
   (u8.16-xor two-arg-u8.16-xor +u8-false+)
   (u8.16+    two-arg-u8.16+ 0)
   (u16.8-and two-arg-u16.8-and +u16-true+)
   (u16.8-or  two-arg-u16.8-or  +u16-false+)
   (u16.8-xor two-arg-u16.8-xor +u16-false+)
   (u16.8+    two-arg-u16.8+ 0)
   (u32.4-and two-arg-u32.4-and +u32-true+)
   (u32.4-or  two-arg-u32.4-or  +u32-false+)
   (u32.4-xor two-arg-u32.4-xor +u32-false+)
   (u32.4+    two-arg-u32.4+ 0)
   (u64.2-and two-arg-u64.2-and +u64-true+)
   (u64.2-or  two-arg-u64.2-or  +u64-false+)
   (u64.2-xor two-arg-u64.2-xor +u64-false+)
   (u64.2+    two-arg-u64.2+ 0)
   (s8.16-and two-arg-s8.16-and +s8-true+)
   (s8.16-or  two-arg-s8.16-or  +s8-false+)
   (s8.16-xor two-arg-s8.16-xor +s8-false+)
   (s8.16+    two-arg-s8.16+ 0)
   (s16.8-and two-arg-s16.8-and +s16-true+)
   (s16.8-or  two-arg-s16.8-or  +s16-false+)
   (s16.8-xor two-arg-s16.8-xor +s16-false+)
   (s16.8+    two-arg-s16.8+ 0)
   (s16.8-mullo two-arg-s16.8-mullo 1)
   (s32.4-and two-arg-s32.4-and +s32-true+)
   (s32.4-or  two-arg-s32.4-or  +s32-false+)
   (s32.4-xor two-arg-s32.4-xor +s32-false+)
   (s32.4+    two-arg-s32.4+ 0)
   (s32.4-mullo two-arg-s32.4-mullo 1)
   (s64.2-and two-arg-s64.2-and +s64-true+)
   (s64.2-or  two-arg-s64.2-or  +s64-false+)
   (s64.2-xor two-arg-s64.2-xor +s64-false+)
   (s64.2+    two-arg-s64.2+ 0)
   ;; The next two functions are only required for implementing
   ;; floating-point comparisons and unequals.
   (u32.8-and two-arg-u32.8-and +u32-true+)
   (u64.4-and two-arg-u64.4-and +u64-true+))
  (:comparisons
   (f32=  two-arg-f32=  u32-and +u32-true+)
   (f32<  two-arg-f32<  u32-and +u32-true+)
   (f32<= two-arg-f32<= u32-and +u32-true+)
   (f32>  two-arg-f32>  u32-and +u32-true+)
   (f32>= two-arg-f32>= u32-and +u32-true+)
   (f64=  two-arg-f64=  u64-and +u64-true+)
   (f64<  two-arg-f64<  u64-and +u64-true+)
   (f64<= two-arg-f64<= u64-and +u64-true+)
   (f64>  two-arg-f64>  u64-and +u64-true+)
   (f64>= two-arg-f64>= u64-and +u64-true+)
   (f32.4=  two-arg-f32.4=  u32.4-and +u32-true+)
   (f32.4<  two-arg-f32.4<  u32.4-and +u32-true+)
   (f32.4<= two-arg-f32.4<= u32.4-and +u32-true+)
   (f32.4>  two-arg-f32.4>  u32.4-and +u32-true+)
   (f32.4>= two-arg-f32.4>= u32.4-and +u32-true+)
   (f64.2=  two-arg-f64.2=  u64.2-and +u64-true+)
   (f64.2<  two-arg-f64.2<  u64.2-and +u64-true+)
   (f64.2<= two-arg-f64.2<= u64.2-and +u64-true+)
   (f64.2>  two-arg-f64.2>  u64.2-and +u64-true+)
   (f64.2>= two-arg-f64.2>= u64.2-and +u64-true+)
   (f32.8=  two-arg-f32.8=  u32.8-and +u32-true+)
   (f32.8<  two-arg-f32.8<  u32.8-and +u32-true+)
   (f32.8<= two-arg-f32.8<= u32.8-and +u32-true+)
   (f32.8>  two-arg-f32.8>  u32.8-and +u32-true+)
   (f32.8>= two-arg-f32.8>= u32.8-and +u32-true+)
   (f64.4=  two-arg-f64.4=  u64.4-and +u64-true+)
   (f64.4<  two-arg-f64.4<  u64.4-and +u64-true+)
   (f64.4<= two-arg-f64.4<= u64.4-and +u64-true+)
   (f64.4>  two-arg-f64.4>  u64.4-and +u64-true+)
   (f64.4>= two-arg-f64.4>= u64.4-and +u64-true+)
   (u8.16=  two-arg-u8.16=  u8.16-and +u8-true+)
   (u8.16<  two-arg-u8.16<  u8.16-and +u8-true+)
   (u8.16<= two-arg-u8.16<= u8.16-and +u8-true+)
   (u8.16>  two-arg-u8.16>  u8.16-and +u8-true+)
   (u8.16>= two-arg-u8.16>= u8.16-and +u8-true+)
   (u16.8=  two-arg-u16.8=  u16.8-and +u16-true+)
   (u16.8<  two-arg-u16.8<  u16.8-and +u16-true+)
   (u16.8<= two-arg-u16.8<= u16.8-and +u16-true+)
   (u16.8>  two-arg-u16.8>  u16.8-and +u16-true+)
   (u16.8>= two-arg-u16.8>= u16.8-and +u16-true+)
   (u32.4=  two-arg-u32.4=  u32.4-and +u32-true+)
   (u32.4<  two-arg-u32.4<  u32.4-and +u32-true+)
   (u32.4<= two-arg-u32.4<= u32.4-and +u32-true+)
   (u32.4>  two-arg-u32.4>  u32.4-and +u32-true+)
   (u32.4>= two-arg-u32.4>= u32.4-and +u32-true+)
   (u64.2=  two-arg-u64.2=  u64.2-and +u64-true+)
   (u64.2<  two-arg-u64.2<  u64.2-and +u64-true+)
   (u64.2<= two-arg-u64.2<= u64.2-and +u64-true+)
   (u64.2>  two-arg-u64.2>  u64.2-and +u64-true+)
   (u64.2>= two-arg-u64.2>= u64.2-and +u64-true+)
   (s8.16=  two-arg-s8.16=  u8.16-and +u8-true+)
   (s8.16<  two-arg-s8.16<  u8.16-and +u8-true+)
   (s8.16<= two-arg-s8.16<= u8.16-and +u8-true+)
   (s8.16>  two-arg-s8.16>  u8.16-and +u8-true+)
   (s8.16>= two-arg-s8.16>= u8.16-and +u8-true+)
   (s16.8=  two-arg-s16.8=  u16.8-and +u16-true+)
   (s16.8<  two-arg-s16.8<  u16.8-and +u16-true+)
   (s16.8<= two-arg-s16.8<= u16.8-and +u16-true+)
   (s16.8>  two-arg-s16.8>  u16.8-and +u16-true+)
   (s16.8>= two-arg-s16.8>= u16.8-and +u16-true+)
   (s32.4=  two-arg-s32.4=  u32.4-and +u32-true+)
   (s32.4<  two-arg-s32.4<  u32.4-and +u32-true+)
   (s32.4<= two-arg-s32.4<= u32.4-and +u32-true+)
   (s32.4>  two-arg-s32.4>  u32.4-and +u32-true+)
   (s32.4>= two-arg-s32.4>= u32.4-and +u32-true+)
   (s64.2=  two-arg-s64.2=  u64.2-and +u64-true+)
   (s64.2<  two-arg-s64.2<  u64.2-and +u64-true+)
   (s64.2<= two-arg-s64.2<= u64.2-and +u64-true+)
   (s64.2>  two-arg-s64.2>  u64.2-and +u64-true+)
   (s64.2>= two-arg-s64.2>= u64.2-and +u64-true+))
  (:ifs
   (f32.4-if f32.4-blend)
   (f32.8-if f32.8-blend)
   (f64.2-if f64.2-blend)
   (f64.4-if f64.4-blend)
   (u8.16-if u8.16-blend)
   (u16.8-if u16.8-blend)
   (u32.4-if u32.4-blend)
   (u64.2-if u64.2-blend)
   (s8.16-if s8.16-blend)
   (s16.8-if s16.8-blend)
   (s32.4-if s32.4-blend)
   (s64.2-if s64.2-blend))
  (:reducers
   (f32- two-arg-f32- 0f0)
   (f32/ two-arg-f32/ 1f0)
   (f64- two-arg-f64- 0d0)
   (f64/ two-arg-f64/ 1d0)
   (f32.4- two-arg-f32.4- 0f0)
   (f32.4/ two-arg-f32.4/ 1f0)
   (f64.2- two-arg-f64.2- 0d0)
   (f64.2/ two-arg-f64.2/ 1d0)
   (f32.8- two-arg-f32.8- 0f0)
   (f32.8/ two-arg-f32.8/ 1f0)
   (f64.4- two-arg-f64.4- 0d0)
   (f64.4/ two-arg-f64.4/ 1d0)
   (u8.16- two-arg-u8.16- 0)
   (u16.8- two-arg-u16.8- 0)
   (u32.4- two-arg-u32.4- 0)
   (u64.2- two-arg-u64.2- 0)
   (s8.16- two-arg-s8.16- 0)
   (s16.8- two-arg-s16.8- 0)
   (s32.4- two-arg-s32.4- 0)
   (s64.2- two-arg-s64.2- 0))
  (:unequals
   (f32/= two-arg-f32/= u32-and +u32-true+)
   (f64/= two-arg-f64/= u64-and +u64-true+)
   (f32.4/= two-arg-f32.4/= u32.4-and +u32-true+)
   (f64.2/= two-arg-f64.2/= u64.2-and +u64-true+)
   (f32.8/= two-arg-f32.8/= u32.8-and +u32-true+)
   (f64.4/= two-arg-f64.4/= u64.4-and +u64-true+)
   (u8.16/= two-arg-u8.16/= u8.16-and +u8-true+)
   (u16.8/= two-arg-u16.8/= u16.8-and +u16-true+)
   (u32.4/= two-arg-u32.4/= u32.4-and +u32-true+)
   (u64.2/= two-arg-u64.2/= u64.2-and +u64-true+)
   (s8.16/= two-arg-s8.16/= u8.16-and +u8-true+)
   (s16.8/= two-arg-s16.8/= u16.8-and +u16-true+)
   (s32.4/= two-arg-s32.4/= u32.4-and +u32-true+)
   (s64.2/= two-arg-s64.2/= u64.2-and +u64-true+)))
